# Loading
library("readxl")
file <- "../xlsx/estaciones-CA.xlsx"
sheets <- c("todas", "traffic", "traffic-urban", "traffic-urban-2020",
"traffic-suburban", "traffic-suburban-2020",
"ciudades-100000", "ciudades-100000-A")
# xlsx files
sites <- read_excel(file, sheet=sheets[8])
# Change categorical variables to factor
sites$Type <- as.factor(sites$Type)
sites$"Site area" <- as.factor(sites$"Site area")
#sites <- sites[sites$Data_end > "2020-10-01" & sites$"Población" > 150000, ]
head(sites)
suppressMessages(library(openair))
suppressMessages(library(ggplot2))
suppressMessages(library(lubridate))
suppressMessages(library(gridExtra))
suppressMessages(library(repr))
options(repr.plot.width=25,
repr.plot.height=10,
#repr.plot.pointsize=50,
repr.plot.family='serif'
)
if (file.exists("../xlsx/dataAQV.csv")) {
dataAQV <- read.csv("../xlsx/dataAQV.csv")
# Convert date to datetime format
dataAQV$date <- ymd_hms(dataAQV$date)
dataAQV$by.day <- ymd(dataAQV$by.day)
dataAQV$by.month <- ymd(dataAQV$by.month)
# Change categorical variables to factor
dataAQV$code <- as.factor(dataAQV$code)
dataAQV$variable <- as.factor(dataAQV$variable)
dataAQV$unit <- as.factor(dataAQV$unit)
}
head(dataAQV)
dataAQV <- importEurope(
site = sites$"Código estación"[c(1:5)],
year=2015:2020,
to_narrow=TRUE,
#meta=TRUE,
)
# Convert date to datetime format
dataAQV$date <- ymd_hms(dataAQV$date)
# Change categorical variables to factor
dataAQV$code <- as.factor(dataAQV$code)
dataAQV$variable <- as.factor(dataAQV$variable)
dataAQV$unit <- as.factor(dataAQV$unit)
pollutants <- c("no", "no2", "o3", "pm10")
for (pol in pollutants) {
for (est in levels(dataAQV$code)) {
if (!(pol %in% dataAQV$variable[dataAQV$code == est])
& length(dataAQV[dataAQV$date > "2020-03-01 00:00:00"
& dataAQV$code == est, ]) == 0) {
to_Delete <- which(dataAQV$code == est)
dataAQV <- dataAQV[-to_Delete, ]
break
}
}
}
print(sum(is.na(dataAQV[dataAQV$variable %in% pollutants, ]$value)))
head(dataAQV)
preCOVID <- ggplot(data = dataAQV[dataAQV$date < "2020-03-01 00:00:00"
& dataAQV$variable %in% pollutants
, ],
aes(x = date, y = value, color=variable)
) +
geom_line() +
geom_line() +
facet_wrap(~code, ncol=1)
COVID <- ggplot(data = dataAQV[dataAQV$date > "2020-03-01 00:00:00"
& dataAQV$variable %in% pollutants
, ],
aes(x = date, y = value, color=variable)
) +
geom_line() +
geom_line() +
facet_wrap(~code, ncol=1)
grid.arrange(preCOVID, COVID, nrow = 1)
dataAQV$by.day <- date(ymd_hms(dataAQV$date))
group.day <- aggregate(value ~ by.day + code + variable, dataAQV, mean)
preCOVID <- ggplot(data = group.day[group.day$by.day < "2020-03-01" &
group.day$variable %in% pollutants, ],
aes(x = by.day, y = value, color=variable)
) +
geom_line() +
geom_line() +
facet_wrap(~code, ncol=1)
COVID <- ggplot(data = group.day[group.day$by.day > "2020-03-01" &
group.day$variable %in% pollutants, ],
aes(x = by.day, y = value, color=variable)
) +
geom_line() +
geom_line() +
facet_wrap(~code, ncol=1)
grid.arrange(preCOVID, COVID, nrow = 1)
dataAQV$by.month <- round_date(ymd_hms(dataAQV$date), unit="month")
group.mth <- aggregate(value ~ by.month + code + variable, dataAQV, median)
preCOVID <- ggplot(data = group.mth[group.mth$by.month < "2020-03-01" &
group.mth$variable %in% pollutants, ],
aes(x = by.month, y = value, color=variable)
) +
geom_line() +
geom_line() +
facet_wrap(~code, ncol=1)
COVID <- ggplot(data = group.mth[group.mth$by.month > "2020-03-01" &
group.mth$variable %in% pollutants, ],
aes(x = by.month, y = value, color=variable)
) +
geom_line() +
geom_line() +
facet_wrap(~code, ncol=1)
grid.arrange(preCOVID, COVID, nrow = 1)
dataAQV$annual.week <- round_date(ymd_hms(dataAQV$date), unit="weeks")
group.week <- aggregate(value ~ annual.week + code + variable, dataAQV, median)
preCOVID <- ggplot(data = group.week[group.week$annual.week < "2020-03-01" &
group.week$variable %in% pollutants, ],
aes(x = annual.week, y = value, color=variable)
) +
geom_line() +
geom_line() +
facet_wrap(~code, ncol=1)
COVID <- ggplot(data = group.week[group.week$annual.week > "2020-03-01" &
group.week$variable %in% pollutants, ],
aes(x = annual.week, y = value, color=variable)
) +
geom_line() +
geom_line() +
facet_wrap(~code, ncol=1)
grid.arrange(preCOVID, COVID, nrow = 1)
data.lm <- group.week[group.week$code == "es1563a"
& group.week$variable == "o3",][ c("value", "annual.week")]
data.lm["month"] <- seq.int(nrow(data.lm))
head(data.lm)
lm.fit <- lm(value ~ sin(2*pi/(1*52.1429)*month)+cos(2*pi/(1*52.1429)*month),
data=data.lm)
ggplot() +
geom_point(data=data.lm, aes(x=annual.week, y=value), color="red") +
geom_line(aes(x=data.lm$annual.week, y=lm.fit$fitted.values), color="blue")
data.lm$jaime <- data.lm$value - lm.fit$fitted.values
#data.lm$jaime <- data.lm$jaime - min(data.lm$jaime)
ggplot() +
geom_line(data=data.lm, aes(x=annual.week, y=scale(jaime)), color="red")
b <- function(i) {
reslm <- lm(value ~ sin(2*pi/50*annual.week)+cos(2*pi/50*annual.week),
data=group.week.1[group.week.1$variable == i[1]
& group.week.1$code == i[2]
,]
)
ggplot(data=group.week.1[group.week.1$variable == i[1]
& group.week.1$code == i[2]
,]) +
geom_point(aes(x=annual.week, y=value, color=variable)) +
geom_line(aes(x=annual.week, y=reslm$fitted.values, color=variable)) +
facet_wrap(~code, ncol=1)
}
pairs <- mapply(c, pollutants, mapply(rep, levels(group.week.1$code), length(levels(group.week.1$code))), SIMPLIFY = T)
ploted <- lapply(pairs, b)
grid.arrange(grobs=ploted, nrow = 4)
group.est <- aggregate(value ~ code + variable + by.day, group.day, mean)
preCOVID <- ggplot(data = group.est[group.est$by.day < "2020-03-01" &
group.est$variable %in% pollutants, ],
aes(x = by.day, y = value, color=variable)
) +
geom_line() +
geom_line()
COVID <- ggplot(data = group.est[group.est$by.day > "2020-03-01"
& group.est$variable %in% pollutants
, ],
aes(x = by.day, y = value, color=variable)
) +
geom_line() +
geom_line()
grid.arrange(preCOVID, COVID, nrow = 1)
write.csv(dataAQV, "../xlsx/dataAQV.csv")